import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

# Read data from CSV files
data_aah = pd.read_csv('data_aah.csv')
data_cox = pd.read_csv('data_cox.csv')

times = np.arange(5, 71, 5)

# Define suffix list
suffixes = [str(i) for i in times]

# Function to calculate data
def calculate_sum_values(data):
    new_st_sum_values = []
    predict_action_t_sum_values = []
    
    for suffix in suffixes:
        new_st_column = f'new_st_estimate_{suffix}'
        predict_action_t_column = f'predict_action_t_st_estimate_{suffix}'
        
        # First exponentiate, then group by study_week and calculate the mean, finally take the logarithm
        new_st_mean_per_week = np.log(data.groupby('study_week')[new_st_column].apply(lambda x: np.exp(x).mean()))
        predict_action_t_mean_per_week = np.log(data.groupby('study_week')[predict_action_t_column].apply(lambda x: np.exp(x).mean()))

        # Sum the means across different study_weeks
        new_st_sum_mean = new_st_mean_per_week.sum()
        predict_action_t_sum_mean = predict_action_t_mean_per_week.sum()

        new_st_sum_values.append(new_st_sum_mean)
        predict_action_t_sum_values.append(predict_action_t_sum_mean)
    
    # Calculate relative differences
    diff_values = [new_st - predict for new_st, predict in zip(new_st_sum_values, predict_action_t_sum_values)]
    
    return diff_values, new_st_sum_values

# Calculate the difference values and new_st_sum_values for both datasets
diff_aah, new_st_aah = calculate_sum_values(data_aah)
diff_cox, new_st_cox = calculate_sum_values(data_cox)

# Print the results
print("Difference Aah:", diff_aah)
print("Difference Cox:", diff_cox)